{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "_ICXkagLFajp"
   },
   "source": [
    "# 一，分析代码运行时间"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "0Fu1N1oGFcAy"
   },
   "source": [
    "## 1，测算代码单次运行时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 平凡方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "e_Bq8hIxkekf"
   },
   "outputs": [],
   "source": [
    "import time\n",
    "tic = time.time()\n",
    "much_job = [x**2 for x in range(1,1000000,3)]\n",
    "toc = time.time()\n",
    "print('used {:.5}s'.format(toc-tic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 快捷方法(jupyter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "xR3PtPAhkeqk"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "much_job = [x**2 for x in range(1,1000000,3)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2,测算代码重复执行多次平均用时"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 平凡方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from timeit import timeit\n",
    "g = lambda x:x**2+1\n",
    "def main():\n",
    "    return(g(2)**120)\n",
    "\n",
    "#timeit('main()',setup = 'from __main__ import main',number = 10)\n",
    "timeit('main()',globals = {'main':main},number = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 快捷方法(jupyter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%timeit -n 10 \n",
    "g = lambda x:x**2+1\n",
    "def main():\n",
    "    return(g(2)**120)\n",
    "main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "ecvv8ZeLFcLk"
   },
   "source": [
    "## 3,按调用函数分析代码运行时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 平凡方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def relu(x):\n",
    "    return(x if x>0 else 0)\n",
    "def main():\n",
    "    result = [relu(x) for x in range(-100000,100000,1)]   \n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import profile\n",
    "profile.run('main()')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "G3zCFPpFFcOB"
   },
   "outputs": [],
   "source": [
    "# 快捷方法(jupyter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%prun main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4，按行分析代码运行时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 平凡方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "5QvQMQQWFcTM"
   },
   "outputs": [],
   "source": [
    "!pip install line_profiler\n",
    "%load_ext line_profiler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def relu(x):\n",
    "    return(x if x>0 else 0)\n",
    "def main():\n",
    "    result = [relu(x) for x in range(-100000,100000)]   \n",
    "    return result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from line_profiler import LineProfiler\n",
    "lprofile = LineProfiler(main,relu)\n",
    "lprofile.run('main()')\n",
    "lprofile.print_stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#快捷方法(jupyter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%lprun -f main -f relu main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "u8e-14scdvtl"
   },
   "source": [
    "# 二，加速你的查找"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "N0_C7S_Fd3nr"
   },
   "source": [
    "## 5,用set而非list进行in查找"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "NqRXw5nBSHRF"
   },
   "outputs": [],
   "source": [
    "data =  (i**2 + 1 for i in range(1000000))\n",
    "list_data = list(data)\n",
    "set_data = set(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 69
    },
    "colab_type": "code",
    "id": "dThLlzyThyzv",
    "outputId": "bb17e4af-5553-4834-9ac9-59b6c960050d"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "1098987 in list_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 69
    },
    "colab_type": "code",
    "id": "PGMI6dl9LQGN",
    "outputId": "e0ad7c1c-bf6f-4873-f8d7-04ca0df4d772"
   },
   "outputs": [],
   "source": [
    "%%time \n",
    "1098987 in set_data "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "02bJNuoGhy4S"
   },
   "source": [
    "## 6,用dict而非两个list进行匹配查找"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "QIhJ2jqnd3xB"
   },
   "outputs": [],
   "source": [
    "list_a = [2*i-1 for  i in range(1000000)]\n",
    "list_b = [i**2 for i in list_a ]\n",
    "dict_ab = dict(zip(list_a,list_b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 71
    },
    "colab_type": "code",
    "id": "-7FWEPlxZiqZ",
    "outputId": "de9b2950-d388-452c-f758-ab903cc0cbce"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "print(list_b[list_a.index(876567)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 71
    },
    "colab_type": "code",
    "id": "JxqcLaYrZr6S",
    "outputId": "bbcdcc54-54ff-409c-d0e5-3d33a0976624"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "print(dict_ab.get(876567,None))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "dHmwdAQwd3zx"
   },
   "source": [
    "# 三，加速你的循环"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "AJsCEo8Ie0pl"
   },
   "source": [
    "## 7,优先使用for循环而不是while循环"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 71
    },
    "colab_type": "code",
    "id": "ZHcLeWvue1OF",
    "outputId": "a5916574-5f63-4abc-a635-faa7ce8717f6"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "s,i = 0,0\n",
    "while i<10000:\n",
    "    i = i + 1\n",
    "    s = s + i\n",
    "print(s) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 71
    },
    "colab_type": "code",
    "id": "kpiIc-O9fS4K",
    "outputId": "cb935467-79ce-4567-80cf-7781de932617"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "s = 0\n",
    "for i in range(1,10001):\n",
    "    s = s + i \n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "eAO9DML7e1ZK"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "UPn78KYGd32B"
   },
   "source": [
    "## 8,循环体中避免重复运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "g0_Gfulkem9s"
   },
   "outputs": [],
   "source": [
    "a = [i**2+1 for i in range(2000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "nrdA3k9pVACJ",
    "outputId": "4304fccb-a1a0-4252-ef64-87da076bcace"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "b = [i/sum(a) for i in a]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "hx7ZTNuKT_D6",
    "outputId": "51bbffe9-d07e-486c-9d2d-781e2bc79388"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "sum_a = sum(a)\n",
    "b = [i/sum_a for i in a]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "9RDsEmwIdG_b"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "dkQHLnzMenLa"
   },
   "source": [
    "# 四，加速你的函数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9，用缓存机制加速递归函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "def fib(n):\n",
    "    return(1 if n in (1,2) else fib(n-1)+fib(n-2))\n",
    "print(fib(30))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "from functools import lru_cache\n",
    "\n",
    "@lru_cache(100)\n",
    "def fib(n):\n",
    "    return(1 if n in (1,2) else fib(n-1)+fib(n-2))\n",
    "print(fib(30))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fib.cache_info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "qJgCJH1PetLL"
   },
   "source": [
    "## 10,用循环取代递归函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "def fib(n):\n",
    "    return(1 if n in (1,2) else fib(n-1)+fib(n-2))\n",
    "print(fib(30))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "Tv7zkwbK1Qci",
    "outputId": "4fbe18c7-9ede-4b7b-c4d6-bc84042a031e"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "def fib(n):\n",
    "    if n in (1,2):\n",
    "        return(1)\n",
    "    a,b = 1,1\n",
    "    for i in range(2,n):\n",
    "        a,b = b,a+b\n",
    "    return(b)\n",
    "print(fib(30))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11， 使用Numba加速Python函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "1q_W5fjO2qEU"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "def my_power(x):\n",
    "    return(x**2)\n",
    "\n",
    "def my_power_sum(n):\n",
    "    s = 0\n",
    "    for i in range(1,n+1):\n",
    "        s = s  + my_power(i)\n",
    "    return(s)\n",
    "\n",
    "print(my_power_sum(1000000))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "from numba import jit\n",
    "\n",
    "@jit\n",
    "def my_power(x):\n",
    "    return(x**2)\n",
    "@jit\n",
    "def my_power_sum(n):\n",
    "    s = 0\n",
    "    for i in range(1,n+1):\n",
    "        s = s  + my_power(i)\n",
    "    return(s)\n",
    "\n",
    "print(my_power_sum(1000000))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "bIUuEmGbeuEh"
   },
   "source": [
    "#  五，使用标准库函数进行加速"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "ljwOF4SBNBua"
   },
   "source": [
    "## 12，使用collections.Counter类加速计数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Ta9S4Vrb108G"
   },
   "outputs": [],
   "source": [
    "data = [x**2%1989 for x in range(2000000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 71
    },
    "colab_type": "code",
    "id": "v3oJzTZfepqb",
    "outputId": "7b30b8cc-c5c4-49db-da63-35635c8c26ea"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "values_count = {}\n",
    "for i in data:\n",
    "    i_cnt = values_count.get(i,0)\n",
    "    values_count[i] = i_cnt + 1\n",
    "print(values_count.get(4,0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "d2y4dVts3Py3",
    "outputId": "7c33de3b-6868-460e-90bf-a6c0d2c1ee36"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "from collections import Counter\n",
    "values_count = Counter(data)\n",
    "print(values_count.get(4,0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "VXWlcglGNB1Z"
   },
   "source": [
    "## 13, 使用collections.ChainMap加速字典合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Z9C0ae5qNB4K"
   },
   "outputs": [],
   "source": [
    "dic_a = {i:i+1 for i in range(1,1000000,2)}\n",
    "dic_b = {i:2*i+1 for i in range(1,1000000,3)}\n",
    "dic_c = {i:3*i+1 for i in range(1,1000000,5)}\n",
    "dic_d = {i:4*i+1 for i in range(1,1000000,7)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "gnUpMt4ogvU-",
    "outputId": "c5da1da9-150b-4dd1-b80a-dbd543589d85"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "result = dic_a.copy()\n",
    "result.update(dic_b)\n",
    "result.update(dic_c)\n",
    "result.update(dic_d)\n",
    "print(result.get(9999,0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 53
    },
    "colab_type": "code",
    "id": "J9gsBFFQg0K3",
    "outputId": "561bbda1-9d85-4004-e159-ef9a2cffafac"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "from collections import ChainMap\n",
    "chain = ChainMap(dic_a,dic_b,dic_c,dic_d)\n",
    "print(chain.get(9999,0))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "eouQJ4dEx7Li"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "m0P9bR84euGy"
   },
   "source": [
    "#  六，使用numpy向量化进行加速"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "9rMqzet_ezNK"
   },
   "source": [
    "## 14,使用np.array代替list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "JI34O1RjezpM"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "a = range(1,1000000,3)\n",
    "b = range(1000000,1,-3)\n",
    "c = [3*a[i]-2*b[i] for i in range(0,len(a))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "XtXkhm_KezvJ"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "import numpy as np \n",
    "array_a = np.arange(1,1000000,3)\n",
    "array_b = np.arange(1000000,1,-3)\n",
    "array_c = 3*array_a - 2*array_b"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 15,使用np.ufunc代替math.func"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import math\n",
    "a = range(1,1000000,3)\n",
    "b = [math.log(x) for x in a]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import numpy as np \n",
    "array_a = np.arange(1,1000000,3)\n",
    "array_b = np.log(array_a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 16,使用np.where代替if "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "array_a = np.arange(-100000,1000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "# np.vectorize可以将普通函数转换成支持向量化的函数\n",
    "relu = np.vectorize(lambda x: x if x>0 else 0)\n",
    "array_b = relu(array_a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "relu = lambda x:np.where(x>0,x,0)\n",
    "array_b = relu(array_a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "l2R07upNezyh"
   },
   "source": [
    "# 七，加速你的Pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 17，优先直接使用np.ufunc函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd \n",
    "df = pd.DataFrame(np.random.randint(-10,11,size = (100000,26)),\n",
    "                  columns = list('abcdefghijklmnopqrstuvwxyz'))\n",
    "\n",
    "%time dfresult = df.applymap(lambda x:np.sin(x)+np.cos(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "dfresult = np.sin(df) + np.cos(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "sClqMvcje-np"
   },
   "source": [
    "## 18，避免动态改变DataFrame的行数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(columns = list('abcdefghijklmnopqrstuvwxyz') )\n",
    "for i in range(10000):\n",
    "    df.loc[i,:] = range(i,i+26)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "uA8k9VIMe-_5"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(np.zeros((10000,26)),\n",
    "                  columns = list('abcdefghijklmnopqrstuvwxyz'))\n",
    "for i in range(10000):\n",
    "    df.loc[i,:] = range(i,i+26)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 19，使用csv文件读写代替xlsx文件读写"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.DataFrame(np.random.randint(-10,11,size=(10000,5)),\n",
    "    columns = list('abced'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "df.to_excel('data.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "df.to_csv('data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 20，使用pandas多进程工具pandarallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(np.random.randint(-10,11,size=(10000,26)),\n",
    "                 columns = list('abcdefghijklmnopqrstuvwxyz'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "result = df.apply(np.sum,axis = 1) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install pandarallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "from pandarallel import pandarallel \n",
    "pandarallel.initialize(nb_workers=4) \n",
    "result = df.parallel_apply(np.sum,axis = 1)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "9Pje2PJZfFzy"
   },
   "source": [
    "# 八，使用Dask进行加速"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 21，使用dask加速dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd                     \n",
    "\n",
    "df = pd.DataFrame(np.random.randint(0,6,size=(100000000,5)),\n",
    "                 columns = list('abcde'))   \n",
    "\n",
    "%time df.groupby('a').mean()    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install dask "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask.dataframe as dd\n",
    "df_dask = dd.from_pandas(df,npartitions=40)\n",
    "%time df_dask.groupby('a').mean().compute()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 22，使用dask.delayed应用多进程加速"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "PcMQnVR8fFub"
   },
   "outputs": [],
   "source": [
    "import time\n",
    "def muchjob(x):\n",
    "    time.sleep(5)\n",
    "    return(x**2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "result = [muchjob(i) for i in range(5)]\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "PcMQnVR8fFub"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "from dask import delayed,compute\n",
    "from dask import threaded,multiprocessing\n",
    "values = [delayed(muchjob)(i) for i in range(5)]\n",
    "result = compute(*values,scheduler='multiprocessing')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "yoC9Qp1se_IR"
   },
   "source": [
    "# 九，应用多线程多进程加速"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 23，使用多线程提升IO密集任务效率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%rm -rf *.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "def writefile(i):\n",
    "    with open(str(i)+'.txt','w') as f:\n",
    "        s = ('hello %d'%i)*10000000\n",
    "        f.write(s)\n",
    "        \n",
    "# 串行任务\n",
    "for i in range(30):\n",
    "    writefile(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import threading\n",
    "\n",
    "def writefile(i):\n",
    "    with open(str(i)+'.txt','w') as f:\n",
    "        s = ('hello %d'%i)*10000000\n",
    "        f.write(s)\n",
    "\n",
    "# 多线程任务\n",
    "thread_list = []    \n",
    "for i in range(30):\n",
    "    t =threading.Thread(target=writefile,args=(i,))\n",
    "    t.setDaemon(True)  #设置为守护线程\n",
    "    thread_list.append(t)\n",
    "\n",
    "for t in thread_list:\n",
    "    t.start() #启动线程\n",
    "\n",
    "for t in thread_list:\n",
    "    t.join() #等待子线程结束"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 24，使用多进程提升CPU密集任务效率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 低速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import time\n",
    "\n",
    "def muchjob(x):\n",
    "    time.sleep(5)\n",
    "    return(x**2)\n",
    "\n",
    "#串行任务\n",
    "ans = [muchjob(i) for i in range(8)]\n",
    "print(ans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 高速方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import time\n",
    "import multiprocessing\n",
    "\n",
    "def muchjob(x):\n",
    "    time.sleep(5)\n",
    "    return(x**2)\n",
    "\n",
    "#多进程任务\n",
    "pool = multiprocessing.Pool(processes=4)\n",
    "result = []\n",
    "for i in range(8):\n",
    "    result.append(pool.apply_async(muchjob, (i,)))\n",
    "pool.close()\n",
    "pool.join()\n",
    "ans = [res.get() for res in result]\n",
    "print(ans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "18式加速你的Python.ipynb",
   "provenance": [],
   "version": "0.3.2"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
